This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
library(readxl)
library(readr)
#examplefile<-"C:/Users/sgartiyu/Desktop/R/multiTimelineActuaryvsDatascience.csv"
#input<-read_csv(examplefile)
#examplefilexl<-"C:/Users/sgartiyu/Desktop/R/multiTimelineActuaryvsDatascience.xlsx"
#inputexcel<-read_excel(examplefilexl,sheet="data")
library(ggplot2)
midwest <- read.csv("http://goo.gl/G1K41K")
options(scipen=999)
theme_set(theme_bw()) # pre-set the bw theme.
data("midwest", package = "ggplot2")
# Scatterplot
gg <- ggplot(midwest, aes(x=area, y=poptotal)) +
geom_point(aes(col=state, size=popdensity)) +
geom_smooth(method="loess", se=F) +
xlim(c(0, 0.1)) +
ylim(c(0, 500000)) +
labs(subtitle="Area Vs Population",
y="Population",
x="Area",
title="Scatterplot",
caption = "Source: midwest")
plot(gg)
http://r-statistics.co/Top50-Ggplot2-Visualizations-MasterList-R-Code.html#top
mpg <- read.csv("http://goo.gl/uEeRGu")
data(mpg, package="ggplot2")
g <- ggplot(mpg, aes(manufacturer))
g + geom_bar(aes(fill=class), width = 0.5) +
theme(axis.text.x = element_text(angle=65, vjust=0.6)) +
labs(title="Histogram on Categorical Variable",
subtitle="Manufacturer across Vehicle Classes")
g <- ggplot(mpg, aes(class, cty))
g + geom_boxplot(varwidth=T, fill="plum") +
labs(title="Box plot",
subtitle="City Mileage grouped by Class of vehicle",
caption="Source: mpg",
x="Class of Vehicle",
y="City Mileage")
Example 1 : Energy Production Data https://cran.r-project.org/web/packages/leaflet.minicharts/vignettes/introduction.html
library(leaflet)
library(leaflet.minicharts)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
prod2016 <- eco2mix %>%
mutate(
renewable = bioenergy + solar + wind + hydraulic,
non_renewable = total - bioenergy - solar - wind - hydraulic
) %>%
filter(grepl("2016", month) & area != "France") %>%
select(-month) %>%
group_by(area, lat, lng) %>%
summarise_all(sum) %>%
ungroup()
tilesURL <- "http://server.arcgisonline.com/ArcGIS/rest/services/Canvas/World_Light_Gray_Base/MapServer/tile/{z}/{y}/{x}"
basemap <- leaflet(width = "100%", height = "400px") %>%
addTiles(tilesURL)
colors <- c("#4fc13c", "#cccccc")
basemap %>%
addMinicharts(
prod2016$lng, prod2016$lat,
type = "pie",
chartdata = prod2016[, c("renewable", "non_renewable")],
colorPalette = colors,
width = 60 * sqrt(prod2016$total) / sqrt(max(prod2016$total)), transitionTime = 0
)
renewable2016 <- prod2016 %>% select(hydraulic, solar, wind)
colors <- c("#3093e5", "#fcba50", "#a0d9e8")
basemap %>%
addMinicharts(
prod2016$lng, prod2016$lat,
chartdata = renewable2016,
colorPalette = colors,
width = 45, height = 45
)
basemap %>%
addMinicharts(
prod2016$lng, prod2016$lat,
chartdata = prod2016$load,
showLabels = TRUE,
width = 45
)
https://rawgit.com/mages/GIRO2012/master/Using_R_in_Insurance_GIRO_2012.html
library(XML)
library(googleVis)
## Creating a generic function for 'toJSON' from package 'jsonlite' in package 'googleVis'
##
## Welcome to googleVis version 0.6.4
##
## Please read Google's Terms of Use
## before you start using the package:
## https://developers.google.com/terms/
##
## Note, the plot method of googleVis will by default use
## the standard browser to display its output.
##
## See the googleVis package vignettes for more details,
## or visit https://github.com/mages/googleVis.
##
## To suppress this message use:
## suppressPackageStartupMessages(library(googleVis))
## Source data diretly from the web
url <- "http://ds.iris.edu/sm2/eventlist/"
eq <- readHTMLTable(readLines(url),
colClasses=c("factor", rep("numeric", 4), "factor"))$evTable
names(eq) <- c("DATE", "LAT", "LON", "MAG",
"DEPTH", "LOCATION_NAME", "IRIS_ID")
##Format location data
eq$loc=paste(eq$LAT, eq$LON, sep=":")
summary(eq)
## DATE LAT LON
## 01-NOV-2019 00:17:21: 1 Min. :-62.85 Min. :-179.78
## 01-NOV-2019 00:30:08: 1 1st Qu.:-15.98 1st Qu.: -69.34
## 01-NOV-2019 02:32:42: 1 Median : 1.44 Median : 111.36
## 01-NOV-2019 02:34:01: 1 Mean : 3.21 Mean : 43.90
## 01-NOV-2019 02:51:52: 1 3rd Qu.: 19.32 3rd Qu.: 128.96
## 01-NOV-2019 04:14:12: 1 Max. : 79.90 Max. : 179.78
## (Other) :797
## MAG DEPTH LOCATION_NAME
## Min. :4.000 Min. : 3.00 NORTHERN MOLUCCA SEA : 60
## 1st Qu.:4.300 1st Qu.: 10.00 MINDANAO, PHILIPPINES: 47
## Median :4.600 Median : 35.00 FIJI ISLANDS REGION : 36
## Mean :4.636 Mean : 88.23 IRIAN JAYA, INDONESIA: 27
## 3rd Qu.:4.900 3rd Qu.:100.50 SOUTHERN IRAN : 21
## Max. :7.100 Max. :643.00 TONGA ISLANDS REGION : 21
## (Other) :591
## IRIS_ID loc
## 11133216: 1 Length:803
## 11133251: 1 Class :character
## 11133272: 1 Mode :character
## 11133313: 1
## 11133325: 1
## 11133332: 1
## (Other) :797
#Display earth quake information of last 30 days
## Create a geo chart with the Google Chart API
G <- gvisGeoChart(eq, "loc", "DEPTH", "MAG",
options=list(displayMode="Markers",
colorAxis="{colors:['purple', 'red', 'orange', 'grey']}",
backgroundColor="lightblue"), chartid="EQ")
plot(G)
## starting httpd help server ...
## done
#use leaflet to plot
tilesURL <- "http://server.arcgisonline.com/ArcGIS/rest/services/Canvas/World_Light_Gray_Base/MapServer/tile/{z}/{y}/{x}"
basemap <- leaflet() %>%
addTiles(tilesURL)
basemap %>%
addMinicharts(
eq$LON, eq$LAT,
chartdata=eq$MAG,
showLabels = TRUE,
width = 10
)
library(gsl)
library(mbbefd)
## Loading required package: fitdistrplus
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: survival
## Loading required package: npsurv
## Loading required package: lsei
## Loading required package: alabama
## Loading required package: numDeriv
## Loading required package: Rcpp
## Package: mbbefd
## Version: 0.8.8.5
## Date: 2019-01-02 11:50:03 UTC
## BugReport: http://github.com/spedygiorgio/mbbefd/issues
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
TIVpct<-seq(0,1,0.001)
trace_1<-ecMBBEFD(TIVpct,swissRe(1)[["g"]],swissRe(1)[["b"]])
trace_2<-ecMBBEFD(TIVpct,swissRe(2)[["g"]],swissRe(2)[["b"]])
trace_3<-ecMBBEFD(TIVpct,swissRe(3)[["g"]],swissRe(3)[["b"]])
data<-data.frame(TIVpct,trace_1,trace_2,trace_3)
plot_ly(data,x=TIVpct,y=trace_1,name="SwissRe1",type='scatter',mode='lines')%>%
add_trace(y=trace_2,name="SwissRe2",mode='lines')%>%
add_trace(y=trace_3,name="SwissRe3",mode='lines')
from website https://www.guru99.com/r-generalized-linear-model.html
traindata<-read.csv("https://raw.githubusercontent.com/guru99-edu/R-Programming/master/adult.csv")
head(traindata,10)
## x age workclass education educational.num marital.status
## 1 1 25 Private 11th 7 Never-married
## 2 2 38 Private HS-grad 9 Married-civ-spouse
## 3 3 28 Local-gov Assoc-acdm 12 Married-civ-spouse
## 4 4 44 Private Some-college 10 Married-civ-spouse
## 5 5 18 ? Some-college 10 Never-married
## 6 6 34 Private 10th 6 Never-married
## 7 7 29 ? HS-grad 9 Never-married
## 8 8 63 Self-emp-not-inc Prof-school 15 Married-civ-spouse
## 9 9 24 Private Some-college 10 Never-married
## 10 10 55 Private 7th-8th 4 Married-civ-spouse
## race gender hours.per.week income
## 1 Black Male 40 <=50K
## 2 White Male 50 <=50K
## 3 White Male 40 >50K
## 4 Black Male 40 >50K
## 5 White Female 30 <=50K
## 6 White Male 30 <=50K
## 7 Black Male 40 <=50K
## 8 White Male 32 >50K
## 9 White Female 40 <=50K
## 10 White Male 10 <=50K
names(traindata)
## [1] "x" "age" "workclass"
## [4] "education" "educational.num" "marital.status"
## [7] "race" "gender" "hours.per.week"
## [10] "income"
ggplot(traindata, aes(x = gender, fill = income)) +
geom_bar(position = "fill") +
theme_classic()+theme(axis.text.x = element_text(angle = 90))
ggplot(traindata, aes(x = marital.status, fill = income)) +
geom_bar(position = "fill") +
theme_classic()+theme(axis.text.x = element_text(angle = 90))
ggplot(traindata, aes(x = gender, y = hours.per.week)) +
geom_boxplot() +
stat_summary(fun.y = mean,
geom = "point",
size = 3,
color = "steelblue") +
theme_classic()
library(GGally)
##
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
##
## nasa
library(dplyr)
# Convert data to numeric
corr <- data.frame(lapply(traindata, as.integer))
# Plot the graph
ggcorr(corr,
method = c("pairwise", "spearman"),
nbreaks = 6,
hjust = 0.8,
label = TRUE,
label_size = 3,
color = "grey50")
#setting train and test datasets
recast_data <- traindata %>%
select(-x)
set.seed(1234)
create_train_test <- function(data, size = 0.8, train = TRUE) {
n_row = nrow(data)
total_row = size * n_row
train_sample <- 1: total_row
if (train == TRUE) {
return (data[train_sample, ])
} else {
return (data[-train_sample, ])
}
}
data_train <- create_train_test(recast_data, 0.8, train = TRUE)
data_test <- create_train_test(recast_data, 0.8, train = FALSE)
dim(data_train)
## [1] 39073 9
formula <- income~.
logit <- glm(formula, data = data_train, family = 'binomial')
summary(logit)
##
## Call:
## glm(formula = formula, family = "binomial", data = data_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6812 -0.5725 -0.2569 -0.0760 3.2066
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error z value
## (Intercept) -7.804801 0.262054 -29.783
## age 0.029258 0.001355 21.590
## workclassFederal-gov 1.394048 0.116969 11.918
## workclassLocal-gov 0.780767 0.104153 7.496
## workclassNever-worked -5.722083 70.726167 -0.081
## workclassPrivate 0.876346 0.091215 9.607
## workclassSelf-emp-inc 1.325356 0.112010 11.832
## workclassSelf-emp-not-inc 0.286969 0.101262 2.834
## workclassState-gov 0.587134 0.114957 5.107
## workclassWithout-pay 0.318163 0.855032 0.372
## education11th 0.158776 0.180589 0.879
## education12th 0.528823 0.226839 2.331
## education1st-4th -0.966682 0.414658 -2.331
## education5th-6th -0.421754 0.261480 -1.613
## education7th-8th -0.379423 0.199574 -1.901
## education9th -0.387995 0.232483 -1.669
## educationAssoc-acdm 1.890430 0.151964 12.440
## educationAssoc-voc 1.702838 0.146993 11.584
## educationBachelors 2.605945 0.135557 19.224
## educationDoctorate 3.691513 0.181594 20.328
## educationHS-grad 1.033916 0.134069 7.712
## educationMasters 3.089361 0.142670 21.654
## educationPreschool -1.239897 1.030867 -1.203
## educationProf-school 3.711683 0.170324 21.792
## educationSome-college 1.510526 0.135466 11.151
## educational.num NA NA NA
## marital.statusMarried-AF-spouse 2.329443 0.445473 5.229
## marital.statusMarried-civ-spouse 2.141602 0.056389 37.979
## marital.statusMarried-spouse-absent 0.157250 0.169541 0.928
## marital.statusNever-married -0.415895 0.068571 -6.065
## marital.statusSeparated -0.048994 0.133401 -0.367
## marital.statusWidowed -0.091912 0.125694 -0.731
## raceAsian-Pac-Islander 0.109641 0.198581 0.552
## raceBlack 0.116996 0.188891 0.619
## raceOther 0.022018 0.269879 0.082
## raceWhite 0.391186 0.180182 2.171
## genderMale 0.089880 0.041729 2.154
## hours.per.week 0.030004 0.001330 22.559
## Pr(>|z|)
## (Intercept) < 0.0000000000000002 ***
## age < 0.0000000000000002 ***
## workclassFederal-gov < 0.0000000000000002 ***
## workclassLocal-gov 0.0000000000000656 ***
## workclassNever-worked 0.9355
## workclassPrivate < 0.0000000000000002 ***
## workclassSelf-emp-inc < 0.0000000000000002 ***
## workclassSelf-emp-not-inc 0.0046 **
## workclassState-gov 0.0000003265562993 ***
## workclassWithout-pay 0.7098
## education11th 0.3793
## education12th 0.0197 *
## education1st-4th 0.0197 *
## education5th-6th 0.1068
## education7th-8th 0.0573 .
## education9th 0.0951 .
## educationAssoc-acdm < 0.0000000000000002 ***
## educationAssoc-voc < 0.0000000000000002 ***
## educationBachelors < 0.0000000000000002 ***
## educationDoctorate < 0.0000000000000002 ***
## educationHS-grad 0.0000000000000124 ***
## educationMasters < 0.0000000000000002 ***
## educationPreschool 0.2291
## educationProf-school < 0.0000000000000002 ***
## educationSome-college < 0.0000000000000002 ***
## educational.num NA
## marital.statusMarried-AF-spouse 0.0000001702923404 ***
## marital.statusMarried-civ-spouse < 0.0000000000000002 ***
## marital.statusMarried-spouse-absent 0.3537
## marital.statusNever-married 0.0000000013184768 ***
## marital.statusSeparated 0.7134
## marital.statusWidowed 0.4646
## raceAsian-Pac-Islander 0.5809
## raceBlack 0.5357
## raceOther 0.9350
## raceWhite 0.0299 *
## genderMale 0.0312 *
## hours.per.week < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 42846 on 39072 degrees of freedom
## Residual deviance: 28488 on 39036 degrees of freedom
## AIC: 28562
##
## Number of Fisher Scoring iterations: 10
predict <- predict(logit, data_test, type = 'response')
# confusion matrix
table_mat <- table(data_test$income, predict > 0.5)
table_mat
##
## FALSE TRUE
## <=50K 6861 504
## >50K 1144 1260
# accuracy Test
accuracy_Test <- sum(diag(table_mat)) / sum(table_mat)
accuracy_Test
## [1] 0.8313031
https://gist.github.com/mages/3687713/659b2826d429823ff4ddb139d4d1bf46fe794dac https://rawgit.com/mages/GIRO2012/master/Using_R_in_Insurance_GIRO_2012.html
library(ChainLadder)
##
## Welcome to ChainLadder version 0.2.10
##
## Type vignette('ChainLadder', package='ChainLadder') to access
## the overall package documentation.
##
## See demo(package='ChainLadder') for a list of demos.
##
## More information is available on the ChainLadder project web-site:
## https://github.com/mages/ChainLadder
##
## To suppress this message use:
## suppressPackageStartupMessages(library(ChainLadder))
library(googleVis)
RAA
## dev
## origin 1 2 3 4 5 6 7 8 9 10
## 1981 5012 8269 10907 11805 13539 16181 18009 18608 18662 18834
## 1982 106 4285 5396 10666 13782 15599 15496 16169 16704 NA
## 1983 3410 8992 13873 16141 18735 22214 22863 23466 NA NA
## 1984 5655 11555 15766 21266 23425 26083 27067 NA NA NA
## 1985 1092 9565 15836 22169 25955 26180 NA NA NA NA
## 1986 1513 6445 11702 12935 15852 NA NA NA NA NA
## 1987 557 4020 10946 12314 NA NA NA NA NA NA
## 1988 1351 6947 13112 NA NA NA NA NA NA NA
## 1989 3133 5395 NA NA NA NA NA NA NA NA
## 1990 2063 NA NA NA NA NA NA NA NA NA
MCL <- MackChainLadder(RAA)
plot(MCL)
MCL
## MackChainLadder(Triangle = RAA)
##
## Latest Dev.To.Date Ultimate IBNR Mack.S.E CV(IBNR)
## 1981 18,834 1.000 18,834 0 0 NaN
## 1982 16,704 0.991 16,858 154 143 0.928
## 1983 23,466 0.974 24,083 617 592 0.959
## 1984 27,067 0.943 28,703 1,636 713 0.436
## 1985 26,180 0.905 28,927 2,747 1,452 0.529
## 1986 15,852 0.813 19,501 3,649 1,995 0.547
## 1987 12,314 0.694 17,749 5,435 2,204 0.405
## 1988 13,112 0.546 24,019 10,907 5,354 0.491
## 1989 5,395 0.336 16,045 10,650 6,332 0.595
## 1990 2,063 0.112 18,402 16,339 24,566 1.503
##
## Totals
## Latest: 160,987.00
## Dev: 0.76
## Ultimate: 213,122.23
## IBNR: 52,135.23
## Mack.S.E 26,880.74
## CV(IBNR): 0.52
class(RAA)
## [1] "triangle" "matrix"
df <- as.data.frame((RAA))
names(df)
## [1] "origin" "dev" "value"
ggplot(df,aes(x=dev,y=value/1000,color=origin,group=origin))+geom_line()
data(GenIns)
dimnames(GenIns)$origin=2002:2011
GenIns <- GenIns/1000
GenIns<-round(GenIns, 0)
df2<-as.data.frame((GenIns))
MCL <- MackChainLadder(GenIns)
plot(MCL)
MCL
## MackChainLadder(Triangle = GenIns)
##
## Latest Dev.To.Date Ultimate IBNR Mack.S.E CV(IBNR)
## 2002 3,901 1.0000 3,901 0.0 0.0 NaN
## 2003 5,339 0.9828 5,432 93.3 71.4 0.765
## 2004 4,909 0.9129 5,378 468.6 118.4 0.253
## 2005 4,588 0.8662 5,297 708.5 130.5 0.184
## 2006 3,873 0.7975 4,857 983.7 260.3 0.265
## 2007 3,692 0.7225 5,110 1,418.1 409.9 0.289
## 2008 3,483 0.6154 5,659 2,176.5 557.3 0.256
## 2009 2,864 0.4223 6,782 3,918.0 873.9 0.223
## 2010 1,363 0.2417 5,640 4,277.3 970.4 0.227
## 2011 344 0.0693 4,967 4,623.3 1,360.9 0.294
##
## Totals
## Latest: 34,356.00
## Dev: 0.65
## Ultimate: 53,023.29
## IBNR: 18,667.29
## Mack.S.E 2,437.59
## CV(IBNR): 0.13
ggplot(df2,aes(x=dev,y=value,color=origin,group=origin))+
geom_line(size=1)+
geom_point()+
ggtitle("Chart 1: Reserving Incurred Development Patterns")+
xlab("Development Year")+
ylab("Incurred Amount")